Milestone 1: We created a folder structure for a dataset with train and validation splits, ensuring a balanced representation of both classes in each split. It is split 80/20 between training and validation sets. Images and labels directories are split into train and val subdirectories. The image count for each class is 500 samples.
import torch
from keras import activations
from keras.src.layers import ReLU
from tensorflow import convert_to_tensor, expand_dims, float32
from tensorflow.keras import layers, Sequential, regularizers
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
import random
from PIL import Image
from tensorflow.python.keras.callbacks import EarlyStopping
from ultralytics import YOLO
def load_dataset_from_annotation(annotation_file, images_base_path, img_size=(128, 128)):
images = []
labels = []
with open(annotation_file, 'r') as f:
for line in f:
parts = line.strip().split(',')
if len(parts) >= 2:
img_path = parts[0].strip()
full_path = os.path.join(images_base_path, img_path)
label = 1 if "homer_simpson" in line else 0
try:
img = Image.open(full_path).convert('RGB')
img = img.resize(img_size)
img_array = np.array(img) / 255.0 # Normalize
images.append(img_array)
labels.append(label)
except Exception as e:
print(f"Error loading {full_path}: {e}")
continue
return np.array(images), np.array(labels)
X_train, y_train = load_dataset_from_annotation(
"dataset/labels/train_annotations.txt",
"dataset",
img_size=(128, 128)
)
X_val, y_val = load_dataset_from_annotation(
"dataset/labels/val_annotations.txt",
"dataset",
img_size=(128, 128)
)
# visualize images
plt.figure(figsize=(10, 5))
for i in range(12):
plt.subplot(3, 4, i + 1)
plt.imshow(X_train[i])
plt.title("Homer" if y_train[i] == 1 else "Non-Homer")
plt.axis('off')
plt.tight_layout()
plt.show()
# baseline model
epochs = 20
batch_size = 32
baseline_model = Sequential([
layers.Conv2D(32, (3, 3), activation='relu', padding='same',
input_shape=(128, 128, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(1, activation='sigmoid')
])
baseline_model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy', 'Precision', 'Recall']
)
history = baseline_model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=epochs,
batch_size=batch_size
)
print("\nTraining metrics:")
train_results = baseline_model.evaluate(X_train, y_train, verbose=0)
print(f"Loss: {train_results[0]:.4f}, Accuracy: {train_results[1]:.4f}")
print("\nValidation metrics:")
base_val_results = baseline_model.evaluate(X_val, y_val, verbose=0)
print(f"Loss: {base_val_results[0]:.4f}, Accuracy: {base_val_results[1]:.4f}")
Epoch 1/20
E:\Code\ML\homiedar\.venv\Lib\site-packages\keras\src\layers\convolutional\base_conv.py:113: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
25/25 ━━━━━━━━━━━━━━━━━━━━ 5s 159ms/step - Precision: 0.5077 - Recall: 0.4925 - accuracy: 0.5075 - loss: 0.7235 - val_Precision: 0.5359 - val_Recall: 0.8200 - val_accuracy: 0.5550 - val_loss: 0.6875 Epoch 2/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 158ms/step - Precision: 0.5968 - Recall: 0.6475 - accuracy: 0.6050 - loss: 0.6687 - val_Precision: 0.7143 - val_Recall: 0.5000 - val_accuracy: 0.6500 - val_loss: 0.6304 Epoch 3/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 159ms/step - Precision: 0.7076 - Recall: 0.5325 - accuracy: 0.6562 - loss: 0.6443 - val_Precision: 0.7396 - val_Recall: 0.7100 - val_accuracy: 0.7300 - val_loss: 0.6221 Epoch 4/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 167ms/step - Precision: 0.6962 - Recall: 0.6875 - accuracy: 0.6938 - loss: 0.5914 - val_Precision: 0.7037 - val_Recall: 0.5700 - val_accuracy: 0.6650 - val_loss: 0.6002 Epoch 5/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 140ms/step - Precision: 0.7307 - Recall: 0.7325 - accuracy: 0.7312 - loss: 0.5427 - val_Precision: 0.7711 - val_Recall: 0.6400 - val_accuracy: 0.7250 - val_loss: 0.5442 Epoch 6/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 141ms/step - Precision: 0.7872 - Recall: 0.7400 - accuracy: 0.7700 - loss: 0.4874 - val_Precision: 0.7589 - val_Recall: 0.8500 - val_accuracy: 0.7900 - val_loss: 0.4722 Epoch 7/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 154ms/step - Precision: 0.8025 - Recall: 0.7925 - accuracy: 0.7987 - loss: 0.4321 - val_Precision: 0.7864 - val_Recall: 0.8100 - val_accuracy: 0.7950 - val_loss: 0.4351 Epoch 8/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 146ms/step - Precision: 0.8159 - Recall: 0.7975 - accuracy: 0.8087 - loss: 0.3944 - val_Precision: 0.8020 - val_Recall: 0.8100 - val_accuracy: 0.8050 - val_loss: 0.4296 Epoch 9/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 155ms/step - Precision: 0.8476 - Recall: 0.7925 - accuracy: 0.8250 - loss: 0.4027 - val_Precision: 0.8451 - val_Recall: 0.6000 - val_accuracy: 0.7450 - val_loss: 0.5424 Epoch 10/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 172ms/step - Precision: 0.8501 - Recall: 0.8225 - accuracy: 0.8388 - loss: 0.3915 - val_Precision: 0.8247 - val_Recall: 0.8000 - val_accuracy: 0.8150 - val_loss: 0.4227 Epoch 11/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 168ms/step - Precision: 0.8741 - Recall: 0.8675 - accuracy: 0.8712 - loss: 0.2940 - val_Precision: 0.8526 - val_Recall: 0.8100 - val_accuracy: 0.8350 - val_loss: 0.4108 Epoch 12/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 145ms/step - Precision: 0.8995 - Recall: 0.8725 - accuracy: 0.8875 - loss: 0.2519 - val_Precision: 0.7736 - val_Recall: 0.8200 - val_accuracy: 0.7900 - val_loss: 0.5028 Epoch 13/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 133ms/step - Precision: 0.9173 - Recall: 0.9150 - accuracy: 0.9162 - loss: 0.1844 - val_Precision: 0.7917 - val_Recall: 0.7600 - val_accuracy: 0.7800 - val_loss: 0.4955 Epoch 14/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 134ms/step - Precision: 0.9179 - Recall: 0.9225 - accuracy: 0.9200 - loss: 0.1682 - val_Precision: 0.7900 - val_Recall: 0.7900 - val_accuracy: 0.7900 - val_loss: 0.5938 Epoch 15/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 135ms/step - Precision: 0.9500 - Recall: 0.9500 - accuracy: 0.9500 - loss: 0.1453 - val_Precision: 0.8022 - val_Recall: 0.7300 - val_accuracy: 0.7750 - val_loss: 0.6714 Epoch 16/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 134ms/step - Precision: 0.9579 - Recall: 0.9675 - accuracy: 0.9625 - loss: 0.1008 - val_Precision: 0.8367 - val_Recall: 0.8200 - val_accuracy: 0.8300 - val_loss: 0.5530 Epoch 17/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 133ms/step - Precision: 0.9728 - Recall: 0.9825 - accuracy: 0.9775 - loss: 0.0764 - val_Precision: 0.8387 - val_Recall: 0.7800 - val_accuracy: 0.8150 - val_loss: 0.5896 Epoch 18/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 136ms/step - Precision: 0.9825 - Recall: 0.9800 - accuracy: 0.9812 - loss: 0.0538 - val_Precision: 0.8163 - val_Recall: 0.8000 - val_accuracy: 0.8100 - val_loss: 0.7155 Epoch 19/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 133ms/step - Precision: 0.9775 - Recall: 0.9775 - accuracy: 0.9775 - loss: 0.0538 - val_Precision: 0.8875 - val_Recall: 0.7100 - val_accuracy: 0.8100 - val_loss: 0.8836 Epoch 20/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 134ms/step - Precision: 0.9899 - Recall: 0.9850 - accuracy: 0.9875 - loss: 0.0430 - val_Precision: 0.7980 - val_Recall: 0.7900 - val_accuracy: 0.7950 - val_loss: 0.6915 Training metrics: Loss: 0.0186, Accuracy: 0.9975 Validation metrics: Loss: 0.6915, Accuracy: 0.7950
We got massiver overfitting!!. The validation is decent anyway, but we have to get rid of the overfitting. Let's apply some regularization techniques such as L2 regularization to the dense layers and Dropout to reduce overfitting. Also reduce some neurons.
regularized_model = Sequential([
layers.Conv2D(32, (3, 3), activation='relu', padding='same',
input_shape=(128, 128, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dense(128, activation='relu', #also decreased the neurons
kernel_regularizer=regularizers.l2(0.01)),
layers.Dropout(0.5),
layers.Dense(64, activation='relu', # was 128
kernel_regularizer=regularizers.l2(0.01)),
layers.Dropout(0.3),
layers.Dense(1, activation='sigmoid')
])
regularized_model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy', 'Precision', 'Recall']
)
history = regularized_model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=epochs,
batch_size=batch_size
)
print("\nTraining metrics:")
train_results = regularized_model.evaluate(X_train, y_train, verbose=0)
print(f"Loss: {train_results[0]:.4f}, Accuracy: {train_results[1]:.4f}")
print("\nValidation metrics:")
reg1_val_results = regularized_model.evaluate(X_val, y_val, verbose=0)
print(f"Loss: {reg1_val_results[0]:.4f}, Accuracy: {reg1_val_results[1]:.4f}")
Epoch 1/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 6s 172ms/step - Precision: 0.4845 - Recall: 0.4675 - accuracy: 0.4850 - loss: 2.1907 - val_Precision: 0.5000 - val_Recall: 1.0000 - val_accuracy: 0.5000 - val_loss: 1.4076 Epoch 2/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 168ms/step - Precision: 0.4993 - Recall: 0.9025 - accuracy: 0.4988 - loss: 1.2321 - val_Precision: 0.5000 - val_Recall: 1.0000 - val_accuracy: 0.5000 - val_loss: 1.0857 Epoch 3/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 149ms/step - Precision: 0.5045 - Recall: 0.4200 - accuracy: 0.5038 - loss: 1.0091 - val_Precision: 0.5000 - val_Recall: 1.0000 - val_accuracy: 0.5000 - val_loss: 0.9391 Epoch 4/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 137ms/step - Precision: 0.5020 - Recall: 0.6125 - accuracy: 0.5025 - loss: 0.8940 - val_Precision: 0.6364 - val_Recall: 0.0700 - val_accuracy: 0.5150 - val_loss: 0.8523 Epoch 5/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 133ms/step - Precision: 0.5652 - Recall: 0.6500 - accuracy: 0.5750 - loss: 0.8167 - val_Precision: 0.7333 - val_Recall: 0.6600 - val_accuracy: 0.7100 - val_loss: 0.7636 Epoch 6/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 132ms/step - Precision: 0.6210 - Recall: 0.6350 - accuracy: 0.6237 - loss: 0.7554 - val_Precision: 0.7128 - val_Recall: 0.6700 - val_accuracy: 0.7000 - val_loss: 0.6649 Epoch 7/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 132ms/step - Precision: 0.7456 - Recall: 0.6375 - accuracy: 0.7100 - loss: 0.6666 - val_Precision: 0.8421 - val_Recall: 0.6400 - val_accuracy: 0.7600 - val_loss: 0.5884 Epoch 8/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 130ms/step - Precision: 0.7540 - Recall: 0.7050 - accuracy: 0.7375 - loss: 0.6384 - val_Precision: 0.8182 - val_Recall: 0.7200 - val_accuracy: 0.7800 - val_loss: 0.5361 Epoch 9/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 132ms/step - Precision: 0.7634 - Recall: 0.6775 - accuracy: 0.7337 - loss: 0.6193 - val_Precision: 0.7500 - val_Recall: 0.7800 - val_accuracy: 0.7600 - val_loss: 0.5838 Epoch 10/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 137ms/step - Precision: 0.7686 - Recall: 0.7225 - accuracy: 0.7525 - loss: 0.5777 - val_Precision: 0.9318 - val_Recall: 0.4100 - val_accuracy: 0.6900 - val_loss: 0.6097 Epoch 11/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 136ms/step - Precision: 0.7923 - Recall: 0.7250 - accuracy: 0.7675 - loss: 0.5536 - val_Precision: 0.9138 - val_Recall: 0.5300 - val_accuracy: 0.7400 - val_loss: 0.5862 Epoch 12/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 137ms/step - Precision: 0.7828 - Recall: 0.7300 - accuracy: 0.7638 - loss: 0.5992 - val_Precision: 0.8462 - val_Recall: 0.6600 - val_accuracy: 0.7700 - val_loss: 0.5964 Epoch 13/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 136ms/step - Precision: 0.7914 - Recall: 0.7400 - accuracy: 0.7725 - loss: 0.5722 - val_Precision: 0.9143 - val_Recall: 0.6400 - val_accuracy: 0.7900 - val_loss: 0.5140 Epoch 14/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 140ms/step - Precision: 0.8464 - Recall: 0.7300 - accuracy: 0.7987 - loss: 0.5267 - val_Precision: 0.8478 - val_Recall: 0.7800 - val_accuracy: 0.8200 - val_loss: 0.4739 Epoch 15/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 136ms/step - Precision: 0.8257 - Recall: 0.7225 - accuracy: 0.7850 - loss: 0.5029 - val_Precision: 0.7895 - val_Recall: 0.7500 - val_accuracy: 0.7750 - val_loss: 0.5103 Epoch 16/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 137ms/step - Precision: 0.8119 - Recall: 0.7875 - accuracy: 0.8025 - loss: 0.4855 - val_Precision: 0.8592 - val_Recall: 0.6100 - val_accuracy: 0.7550 - val_loss: 0.5236 Epoch 17/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 136ms/step - Precision: 0.8547 - Recall: 0.7650 - accuracy: 0.8175 - loss: 0.4667 - val_Precision: 0.7838 - val_Recall: 0.8700 - val_accuracy: 0.8150 - val_loss: 0.5170 Epoch 18/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 135ms/step - Precision: 0.8325 - Recall: 0.8200 - accuracy: 0.8275 - loss: 0.4463 - val_Precision: 0.8462 - val_Recall: 0.7700 - val_accuracy: 0.8150 - val_loss: 0.5029 Epoch 19/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 146ms/step - Precision: 0.8723 - Recall: 0.8025 - accuracy: 0.8425 - loss: 0.4363 - val_Precision: 0.8469 - val_Recall: 0.8300 - val_accuracy: 0.8400 - val_loss: 0.4891 Epoch 20/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 147ms/step - Precision: 0.8933 - Recall: 0.8375 - accuracy: 0.8687 - loss: 0.4000 - val_Precision: 0.8261 - val_Recall: 0.7600 - val_accuracy: 0.8000 - val_loss: 0.5076 Training metrics: Loss: 0.3385, Accuracy: 0.8938 Validation metrics: Loss: 0.5076, Accuracy: 0.8000
We've got the overfitting down a bit. The validation accuracy is still around the same but there's still more potential we can get from EarlyStopping.
history = regularized_model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=epochs,
callbacks=[EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)],
batch_size=batch_size
)
print("\nTraining metrics:")
train_results = regularized_model.evaluate(X_train, y_train, verbose=0)
print(f"Loss: {train_results[0]:.4f}, Accuracy: {train_results[1]:.4f}")
print("\nValidation metrics:")
reg2_val_results = regularized_model.evaluate(X_val, y_val, verbose=0)
print(f"Loss: {reg2_val_results[0]:.4f}, Accuracy: {reg2_val_results[1]:.4f}")
Epoch 1/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 161ms/step - Precision: 0.8953 - Recall: 0.8550 - accuracy: 0.8775 - loss: 0.3804 - val_Precision: 0.8690 - val_Recall: 0.7300 - val_accuracy: 0.8100 - val_loss: 0.5173 Epoch 2/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 158ms/step - Precision: 0.9171 - Recall: 0.8300 - accuracy: 0.8775 - loss: 0.3749 - val_Precision: 0.8462 - val_Recall: 0.7700 - val_accuracy: 0.8150 - val_loss: 0.4736 Epoch 3/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 160ms/step - Precision: 0.9231 - Recall: 0.8400 - accuracy: 0.8850 - loss: 0.3567 - val_Precision: 0.7788 - val_Recall: 0.8800 - val_accuracy: 0.8150 - val_loss: 0.5685 Epoch 4/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 146ms/step - Precision: 0.9104 - Recall: 0.7875 - accuracy: 0.8550 - loss: 0.4428 - val_Precision: 0.7807 - val_Recall: 0.8900 - val_accuracy: 0.8200 - val_loss: 0.5831 Epoch 5/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 4s 152ms/step - Precision: 0.8930 - Recall: 0.8350 - accuracy: 0.8675 - loss: 0.4092 - val_Precision: 0.8000 - val_Recall: 0.7600 - val_accuracy: 0.7850 - val_loss: 0.5890 Epoch 6/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 135ms/step - Precision: 0.9324 - Recall: 0.8625 - accuracy: 0.9000 - loss: 0.3363 - val_Precision: 0.8280 - val_Recall: 0.7700 - val_accuracy: 0.8050 - val_loss: 0.6636 Epoch 7/20 25/25 ━━━━━━━━━━━━━━━━━━━━ 3s 137ms/step - Precision: 0.9160 - Recall: 0.8725 - accuracy: 0.8963 - loss: 0.3422 - val_Precision: 0.8902 - val_Recall: 0.7300 - val_accuracy: 0.8200 - val_loss: 0.5338 Training metrics: Loss: 0.3193, Accuracy: 0.9062 Validation metrics: Loss: 0.4736, Accuracy: 0.8150
With the addition of regularization and EarlyStopping we've gotten the validation accuracy up to 81.5% now
regularized_model.save('homer_classifier_model.h5')
WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.
predictions = regularized_model.predict(X_val)
predicted_labels = (predictions > 0.5).astype(int).flatten()
plt.figure(figsize=(15, 10))
num_images = 20
for i in range(num_images):
plt.subplot(4, 5, i + 1)
plt.imshow(X_val[i])
pred = predicted_labels[i]
actual = y_val[i]
confidence = predictions[i][0] if pred == 1 else 1 - predictions[i][0]
color = 'green' if pred == actual else 'red'
pred_label = "Homer" if pred == 1 else "Non-Homer"
actual_label = "Homer" if actual == 1 else "Non-Homer"
plt.title(f"Pred: {pred_label}\nActual: {actual_label}\nConf: {confidence:.2f}",
color=color, fontsize=9)
plt.axis('off')
plt.tight_layout()
plt.show()
correct = np.sum(predicted_labels == y_val)
total = len(y_val)
accuracy = correct / total
print(f"\nValidation Set Performance:")
print(f"Correct predictions: {correct}/{total}")
print(f"Accuracy: {accuracy:.2%}")
print(f"\nConfusion Matrix:")
print(f"True Positives (Homer predicted as Homer): {np.sum((predicted_labels == 1) & (y_val == 1))}")
print(f"True Negatives (Non-Homer predicted as Non-Homer): {np.sum((predicted_labels == 0) & (y_val == 0))}")
print(f"False Positives (Non-Homer predicted as Homer): {np.sum((predicted_labels == 1) & (y_val == 0))}")
print(f"False Negatives (Homer predicted as Non-Homer): {np.sum((predicted_labels == 0) & (y_val == 1))}")
7/7 ━━━━━━━━━━━━━━━━━━━━ 0s 43ms/step
Validation Set Performance: Correct predictions: 163/200 Accuracy: 81.50% Confusion Matrix: True Positives (Homer predicted as Homer): 77 True Negatives (Non-Homer predicted as Non-Homer): 86 False Positives (Non-Homer predicted as Homer): 14 False Negatives (Homer predicted as Non-Homer): 23
Let's now convert the annotations to YOLO format and create the YAML file for object detection training. We have to note that the current annotations use box corners, not center-normalized bounding boxes as YOLO needs.
def create_yolo_labels(annotation_file, img_dir, label_dir):
os.makedirs(label_dir, exist_ok=True)
with open(annotation_file, 'r') as f:
lines = f.readlines()
for line in lines:
if ',' in line:
parts = line.strip().split(',')
img_path = parts[0]
img_name = os.path.basename(img_path)
x1, y1, x2, y2 = map(float, parts[1:5])
class_label = parts[5]
actual_img = f"{img_dir}/{img_name}"
if not os.path.exists(actual_img):
continue
try:
with Image.open(actual_img) as img:
W, H = img.size
except:
continue
left = min(x1, x2)
right = max(x1, x2)
top = min(y1, y2)
bottom = max(y1, y2)
width = right - left
height = bottom - top
x_center = (left + width/2) / W
y_center = (top + height/2) / H
width_norm = width / W
height_norm = height / H
class_id = 0 if class_label == 'homer_simpson' else 1
txt_name = img_name.rsplit('.', 1)[0] + '.txt'
txt_path = f"{label_dir}/{txt_name}"
with open(txt_path, 'w') as f:
f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width_norm:.6f} {height_norm:.6f}\n")
create_yolo_labels(
"dataset/labels/train_annotations.txt",
"dataset/images/train",
"dataset/labels/train"
)
create_yolo_labels(
"dataset/labels/val_annotations.txt",
"dataset/images/val",
"dataset/labels/val"
)
yaml_content = """path: ./dataset
train: images/train
val: images/val
nc: 2
names: ['homer_simpson', 'not_homer']
"""
with open("simpsons.yaml", "w") as f:
f.write(yaml_content)
Now let's double check that the YOLO labels are correct by visualizing some images with their bounding boxes.
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os
from PIL import Image
import numpy as np
np.random.seed(42)
all_images = []
for root, dirs, files in os.walk("dataset/images/train"):
for f in files:
if f.lower().endswith(('.jpg', '.png', '.jpeg')):
all_images.append(f)
selected = np.random.choice(all_images, 6, replace=False)
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()
for idx, img_name in enumerate(selected):
ax = axes[idx]
img_path = f"dataset/images/train/{img_name}"
img = Image.open(img_path).convert('RGB')
img_array = np.array(img)
H, W = img_array.shape[:2]
# FIND YOLO ANNOTATION (.txt file)
txt_name = img_name.rsplit('.', 1)[0] + '.txt'
txt_path = f"dataset/labels/train/{txt_name}"
ax.imshow(img_array)
if os.path.exists(txt_path):
# Read YOLO format
with open(txt_path, 'r') as f:
line = f.readline().strip()
if line:
parts = line.split()
if len(parts) == 5:
class_id, x_center, y_center, width_norm, height_norm = map(float, parts)
# Convert YOLO to pixel coordinates
x_center_px = x_center * W
y_center_px = y_center * H
width_px = width_norm * W
height_px = height_norm * H
left = x_center_px - width_px/2
top = y_center_px - height_px/2
# Draw box
rect = patches.Rectangle(
(left, top), width_px, height_px,
linewidth=3, edgecolor='red', facecolor='none'
)
ax.add_patch(rect)
class_name = "Homer" if class_id == 0 else "Not Homer"
ax.text(left, top-10, class_name,
color='red', fontsize=11, fontweight='bold',
bbox=dict(facecolor='white', alpha=0.8))
# Show YOLO coordinates
ax.text(5, 15, f"YOLO: ({x_center:.2f},{y_center:.2f})",
color='blue', fontsize=9,
bbox=dict(facecolor='white', alpha=0.7))
ax.set_title(f"{img_name}\n{W}x{H}", fontsize=10)
ax.axis('off')
plt.suptitle("YOLO labels visualized", fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
Now as we can see the yolo labels are properly positioned, we can start training the object detection.
model = YOLO('yolov5s.pt')
print("using gpu:", torch.cuda.is_available())
results = model.train(
data='simpsons.yaml',
epochs=20,
imgsz=416,
batch=4,
name='simpsons_yolo_v1',
patience=10,
device='cuda'
)
PRO TIP Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'. YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5. using gpu: True Ultralytics 8.3.234 Python-3.13.5 torch-2.9.1+cu130 CUDA:0 (NVIDIA GeForce RTX 3060 Ti, 8191MiB) engine\trainer: agnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=simpsons.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=416, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov5s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=simpsons_yolo_v115, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=10, perspective=0.0, plots=True, pose=12.0, pretrained=True, profile=False, project=None, rect=False, resume=False, retina_masks=False, save=True, save_conf=False, save_crop=False, save_dir=E:\Code\ML\homiedar\runs\detect\simpsons_yolo_v115, save_frames=False, save_json=False, save_period=-1, save_txt=False, scale=0.5, seed=0, shear=0.0, show=False, show_boxes=True, show_conf=True, show_labels=True, simplify=True, single_cls=False, source=None, split=val, stream_buffer=False, task=detect, time=None, tracker=botsort.yaml, translate=0.1, val=True, verbose=True, vid_stride=1, visualize=False, warmup_bias_lr=0.1, warmup_epochs=3.0, warmup_momentum=0.8, weight_decay=0.0005, workers=8, workspace=None Overriding model.yaml nc=80 with nc=2 from n params module arguments 0 -1 1 3520 ultralytics.nn.modules.conv.Conv [3, 32, 6, 2, 2] 1 -1 1 18560 ultralytics.nn.modules.conv.Conv [32, 64, 3, 2] 2 -1 1 18816 ultralytics.nn.modules.block.C3 [64, 64, 1] 3 -1 1 73984 ultralytics.nn.modules.conv.Conv [64, 128, 3, 2] 4 -1 2 115712 ultralytics.nn.modules.block.C3 [128, 128, 2] 5 -1 1 295424 ultralytics.nn.modules.conv.Conv [128, 256, 3, 2] 6 -1 3 625152 ultralytics.nn.modules.block.C3 [256, 256, 3] 7 -1 1 1180672 ultralytics.nn.modules.conv.Conv [256, 512, 3, 2] 8 -1 1 1182720 ultralytics.nn.modules.block.C3 [512, 512, 1] 9 -1 1 656896 ultralytics.nn.modules.block.SPPF [512, 512, 5] 10 -1 1 131584 ultralytics.nn.modules.conv.Conv [512, 256, 1, 1] 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] 12 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] 13 -1 1 361984 ultralytics.nn.modules.block.C3 [512, 256, 1, False] 14 -1 1 33024 ultralytics.nn.modules.conv.Conv [256, 128, 1, 1] 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] 16 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] 17 -1 1 90880 ultralytics.nn.modules.block.C3 [256, 128, 1, False] 18 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] 19 [-1, 14] 1 0 ultralytics.nn.modules.conv.Concat [1] 20 -1 1 296448 ultralytics.nn.modules.block.C3 [256, 256, 1, False] 21 -1 1 590336 ultralytics.nn.modules.conv.Conv [256, 256, 3, 2] 22 [-1, 10] 1 0 ultralytics.nn.modules.conv.Concat [1] 23 -1 1 1182720 ultralytics.nn.modules.block.C3 [512, 512, 1, False] 24 [17, 20, 23] 1 2116822 ultralytics.nn.modules.head.Detect [2, [128, 256, 512]] YOLOv5s summary: 153 layers, 9,122,966 parameters, 9,122,950 gradients, 24.0 GFLOPs Transferred 421/427 items from pretrained weights Freezing layer 'model.24.dfl.conv.weight' AMP: running Automatic Mixed Precision (AMP) checks... AMP: checks passed train: Fast image access (ping: 0.20.1 ms, read: 33.911.6 MB/s, size: 25.9 KB) train: Scanning E:\Code\ML\homiedar\dataset\labels\train.cache... 800 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 800/800 1.8Mit/s 0.0s0s val: Fast image access (ping: 1.01.0 ms, read: 27.115.5 MB/s, size: 24.7 KB) val: Scanning E:\Code\ML\homiedar\dataset\labels\val.cache... 200 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 200/200 114.7Kit/s 0.0s Plotting labels to E:\Code\ML\homiedar\runs\detect\simpsons_yolo_v115\labels.jpg... optimizer: 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... optimizer: AdamW(lr=0.001667, momentum=0.9) with parameter groups 69 weight(decay=0.0), 76 weight(decay=0.0005), 75 bias(decay=0.0) Image sizes 416 train, 416 val Using 8 dataloader workers Logging results to E:\Code\ML\homiedar\runs\detect\simpsons_yolo_v115 Starting training for 20 epochs... Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 1/20 0.561G 1.701 2.198 1.657 8 416: 100% ━━━━━━━━━━━━ 200/200 9.6it/s 20.7s<0.2s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 12.2it/s 2.1s0.1s all 200 200 0.283 0.675 0.477 0.208 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 2/20 0.766G 1.748 1.992 1.691 13 416: 100% ━━━━━━━━━━━━ 200/200 11.1it/s 18.0s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 15.8it/s 1.6s0.1s all 200 200 0.32 0.605 0.523 0.188 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 3/20 0.766G 1.775 1.93 1.661 13 416: 100% ━━━━━━━━━━━━ 200/200 11.3it/s 17.7s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.8it/s 1.5s0.1s all 200 200 0.516 0.647 0.596 0.246 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 4/20 0.766G 1.788 1.923 1.701 9 416: 100% ━━━━━━━━━━━━ 200/200 11.5it/s 17.4s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.7it/s 1.5s0.1s all 200 200 0.622 0.633 0.628 0.26 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 5/20 0.766G 1.781 1.927 1.707 11 416: 100% ━━━━━━━━━━━━ 200/200 11.5it/s 17.4s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.8it/s 1.5s0.1s all 200 200 0.625 0.682 0.67 0.261 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 6/20 0.766G 1.701 1.808 1.63 10 416: 100% ━━━━━━━━━━━━ 200/200 11.4it/s 17.6s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.9it/s 1.5s0.1s all 200 200 0.648 0.675 0.723 0.321 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 7/20 0.766G 1.707 1.779 1.671 11 416: 100% ━━━━━━━━━━━━ 200/200 11.5it/s 17.4s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.4it/s 1.5s0.1s all 200 200 0.71 0.715 0.764 0.334 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 8/20 0.766G 1.695 1.775 1.637 8 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.2s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.9it/s 1.5s0.1s all 200 200 0.745 0.647 0.737 0.341 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 9/20 0.766G 1.644 1.706 1.595 7 416: 100% ━━━━━━━━━━━━ 200/200 11.4it/s 17.5s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.7it/s 1.5s0.1s all 200 200 0.637 0.665 0.68 0.301 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 10/20 0.766G 1.61 1.677 1.588 10 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.3s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.5it/s 1.5s0.1s all 200 200 0.643 0.658 0.706 0.322 Closing dataloader mosaic Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 11/20 0.766G 1.706 1.562 1.694 4 416: 100% ━━━━━━━━━━━━ 200/200 11.5it/s 17.5s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.8it/s 1.5s0.1s all 200 200 0.719 0.677 0.743 0.327 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 12/20 0.766G 1.652 1.476 1.653 4 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.2s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.2it/s 1.5s0.1s all 200 200 0.648 0.671 0.711 0.318 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 13/20 0.766G 1.59 1.396 1.615 4 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.3s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.2it/s 1.5s0.1s all 200 200 0.734 0.74 0.817 0.395 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 14/20 0.766G 1.566 1.344 1.6 4 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.2s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.4it/s 1.5s0.1s all 200 200 0.755 0.694 0.779 0.393 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 15/20 0.768G 1.568 1.351 1.59 4 416: 100% ━━━━━━━━━━━━ 200/200 11.5it/s 17.4s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.7it/s 1.5s0.1s all 200 200 0.806 0.723 0.797 0.392 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 16/20 0.768G 1.541 1.29 1.579 4 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.2s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.0it/s 1.6s0.1s all 200 200 0.718 0.788 0.808 0.403 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 17/20 0.768G 1.516 1.21 1.53 4 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.3s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.8it/s 1.5s0.1s all 200 200 0.761 0.748 0.814 0.401 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 18/20 0.768G 1.492 1.199 1.522 4 416: 100% ━━━━━━━━━━━━ 200/200 11.5it/s 17.4s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.3it/s 1.5s0.1s all 200 200 0.778 0.733 0.814 0.411 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 19/20 0.768G 1.456 1.175 1.504 4 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.3s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.9it/s 1.5s0.1s all 200 200 0.789 0.775 0.829 0.413 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 20/20 0.768G 1.451 1.146 1.498 4 416: 100% ━━━━━━━━━━━━ 200/200 11.6it/s 17.2s<0.1s Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 16.9it/s 1.5s0.1s all 200 200 0.8 0.75 0.831 0.421 20 epochs completed in 0.113 hours. Optimizer stripped from E:\Code\ML\homiedar\runs\detect\simpsons_yolo_v115\weights\last.pt, 18.5MB Optimizer stripped from E:\Code\ML\homiedar\runs\detect\simpsons_yolo_v115\weights\best.pt, 18.5MB Validating E:\Code\ML\homiedar\runs\detect\simpsons_yolo_v115\weights\best.pt... Ultralytics 8.3.234 Python-3.13.5 torch-2.9.1+cu130 CUDA:0 (NVIDIA GeForce RTX 3060 Ti, 8191MiB) YOLOv5s summary (fused): 84 layers, 9,112,310 parameters, 0 gradients, 23.8 GFLOPs Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 25/25 20.3it/s 1.2s0.1s all 200 200 0.8 0.752 0.831 0.421 homer_simpson 100 100 0.841 0.84 0.907 0.456 not_homer 100 100 0.759 0.663 0.756 0.386 Speed: 0.1ms preprocess, 1.9ms inference, 0.0ms loss, 1.2ms postprocess per image Results saved to E:\Code\ML\homiedar\runs\detect\simpsons_yolo_v115
model = YOLO('runs/detect/simpsons_yolo_v115/weights/best.pt')
results = model.predict(
source='dataset/images/val',
imgsz=416,
conf=0.25,
device='cuda'
)
image 1/200 E:\Code\ML\homiedar\dataset\images\val\val_0000.jpg: 416x288 3 not_homers, 77.5ms image 2/200 E:\Code\ML\homiedar\dataset\images\val\val_0001.jpg: 320x416 1 not_homer, 73.2ms image 3/200 E:\Code\ML\homiedar\dataset\images\val\val_0002.jpg: 416x416 1 homer_simpson, 11.4ms image 4/200 E:\Code\ML\homiedar\dataset\images\val\val_0003.jpg: 320x416 1 homer_simpson, 12.2ms image 5/200 E:\Code\ML\homiedar\dataset\images\val\val_0004.jpg: 320x416 1 homer_simpson, 12.5ms image 6/200 E:\Code\ML\homiedar\dataset\images\val\val_0005.jpg: 416x384 2 not_homers, 74.5ms image 7/200 E:\Code\ML\homiedar\dataset\images\val\val_0006.jpg: 416x288 2 homer_simpsons, 11.2ms image 8/200 E:\Code\ML\homiedar\dataset\images\val\val_0007.jpg: 320x416 1 homer_simpson, 11.6ms image 9/200 E:\Code\ML\homiedar\dataset\images\val\val_0008.jpg: 320x416 1 homer_simpson, 11.6ms image 10/200 E:\Code\ML\homiedar\dataset\images\val\val_0009.jpg: 320x416 1 homer_simpson, 14.0ms image 11/200 E:\Code\ML\homiedar\dataset\images\val\val_0010.jpg: 256x416 1 not_homer, 76.9ms image 12/200 E:\Code\ML\homiedar\dataset\images\val\val_0011.jpg: 320x416 1 homer_simpson, 12.4ms image 13/200 E:\Code\ML\homiedar\dataset\images\val\val_0012.jpg: 320x416 1 homer_simpson, 11.6ms image 14/200 E:\Code\ML\homiedar\dataset\images\val\val_0013.jpg: 320x416 1 homer_simpson, 11.3ms image 15/200 E:\Code\ML\homiedar\dataset\images\val\val_0014.jpg: 320x416 1 not_homer, 11.4ms image 16/200 E:\Code\ML\homiedar\dataset\images\val\val_0015.jpg: 416x288 2 homer_simpsons, 14.0ms image 17/200 E:\Code\ML\homiedar\dataset\images\val\val_0016.jpg: 320x416 1 not_homer, 17.0ms image 18/200 E:\Code\ML\homiedar\dataset\images\val\val_0017.jpg: 416x288 1 not_homer, 13.9ms image 19/200 E:\Code\ML\homiedar\dataset\images\val\val_0018.jpg: 256x416 1 not_homer, 14.0ms image 20/200 E:\Code\ML\homiedar\dataset\images\val\val_0019.jpg: 416x288 1 not_homer, 12.7ms image 21/200 E:\Code\ML\homiedar\dataset\images\val\val_0020.jpg: 256x416 1 not_homer, 14.6ms image 22/200 E:\Code\ML\homiedar\dataset\images\val\val_0021.jpg: 320x416 2 not_homers, 17.3ms image 23/200 E:\Code\ML\homiedar\dataset\images\val\val_0022.jpg: 416x288 1 homer_simpson, 12.5ms image 24/200 E:\Code\ML\homiedar\dataset\images\val\val_0023.jpg: 320x416 1 homer_simpson, 14.9ms image 25/200 E:\Code\ML\homiedar\dataset\images\val\val_0024.jpg: 416x288 1 homer_simpson, 16.7ms image 26/200 E:\Code\ML\homiedar\dataset\images\val\val_0025.jpg: 416x288 2 homer_simpsons, 12.1ms image 27/200 E:\Code\ML\homiedar\dataset\images\val\val_0026.jpg: 256x416 1 not_homer, 13.0ms image 28/200 E:\Code\ML\homiedar\dataset\images\val\val_0027.jpg: 256x416 2 homer_simpsons, 11.7ms image 29/200 E:\Code\ML\homiedar\dataset\images\val\val_0028.jpg: 416x288 1 not_homer, 14.6ms image 30/200 E:\Code\ML\homiedar\dataset\images\val\val_0029.jpg: 416x288 1 not_homer, 11.1ms image 31/200 E:\Code\ML\homiedar\dataset\images\val\val_0030.jpg: 320x416 1 homer_simpson, 12.4ms image 32/200 E:\Code\ML\homiedar\dataset\images\val\val_0031.jpg: 416x288 1 homer_simpson, 12.1ms image 33/200 E:\Code\ML\homiedar\dataset\images\val\val_0032.jpg: 416x288 1 homer_simpson, 1 not_homer, 11.9ms image 34/200 E:\Code\ML\homiedar\dataset\images\val\val_0033.jpg: 320x416 1 not_homer, 13.5ms image 35/200 E:\Code\ML\homiedar\dataset\images\val\val_0034.jpg: 320x416 1 not_homer, 11.2ms image 36/200 E:\Code\ML\homiedar\dataset\images\val\val_0035.jpg: 320x416 2 homer_simpsons, 20.3ms image 37/200 E:\Code\ML\homiedar\dataset\images\val\val_0036.jpg: 320x416 (no detections), 17.4ms image 38/200 E:\Code\ML\homiedar\dataset\images\val\val_0037.jpg: 416x288 2 homer_simpsons, 12.5ms image 39/200 E:\Code\ML\homiedar\dataset\images\val\val_0038.jpg: 416x288 (no detections), 11.6ms image 40/200 E:\Code\ML\homiedar\dataset\images\val\val_0039.jpg: 320x416 1 homer_simpson, 12.4ms image 41/200 E:\Code\ML\homiedar\dataset\images\val\val_0040.jpg: 320x416 2 not_homers, 11.3ms image 42/200 E:\Code\ML\homiedar\dataset\images\val\val_0041.jpg: 320x416 1 homer_simpson, 11.5ms image 43/200 E:\Code\ML\homiedar\dataset\images\val\val_0042.jpg: 416x288 1 homer_simpson, 14.7ms image 44/200 E:\Code\ML\homiedar\dataset\images\val\val_0043.jpg: 320x416 1 homer_simpson, 10.9ms image 45/200 E:\Code\ML\homiedar\dataset\images\val\val_0044.jpg: 416x288 2 not_homers, 10.6ms image 46/200 E:\Code\ML\homiedar\dataset\images\val\val_0045.jpg: 320x416 1 homer_simpson, 11.0ms image 47/200 E:\Code\ML\homiedar\dataset\images\val\val_0046.jpg: 416x288 1 homer_simpson, 10.5ms image 48/200 E:\Code\ML\homiedar\dataset\images\val\val_0047.jpg: 416x288 2 not_homers, 10.0ms image 49/200 E:\Code\ML\homiedar\dataset\images\val\val_0048.jpg: 320x416 1 homer_simpson, 10.6ms image 50/200 E:\Code\ML\homiedar\dataset\images\val\val_0049.jpg: 320x416 1 not_homer, 10.0ms image 51/200 E:\Code\ML\homiedar\dataset\images\val\val_0050.jpg: 320x416 2 homer_simpsons, 9.9ms image 52/200 E:\Code\ML\homiedar\dataset\images\val\val_0051.jpg: 320x416 1 homer_simpson, 10.1ms image 53/200 E:\Code\ML\homiedar\dataset\images\val\val_0052.jpg: 320x416 1 homer_simpson, 9.8ms image 54/200 E:\Code\ML\homiedar\dataset\images\val\val_0053.jpg: 320x416 1 homer_simpson, 10.2ms image 55/200 E:\Code\ML\homiedar\dataset\images\val\val_0054.jpg: 416x320 2 not_homers, 59.4ms image 56/200 E:\Code\ML\homiedar\dataset\images\val\val_0055.jpg: 256x416 1 homer_simpson, 10.6ms image 57/200 E:\Code\ML\homiedar\dataset\images\val\val_0056.jpg: 416x288 2 not_homers, 10.6ms image 58/200 E:\Code\ML\homiedar\dataset\images\val\val_0057.jpg: 416x288 2 homer_simpsons, 10.1ms image 59/200 E:\Code\ML\homiedar\dataset\images\val\val_0058.jpg: 416x384 1 not_homer, 10.4ms image 60/200 E:\Code\ML\homiedar\dataset\images\val\val_0059.jpg: 320x416 2 not_homers, 10.7ms image 61/200 E:\Code\ML\homiedar\dataset\images\val\val_0060.jpg: 320x416 1 homer_simpson, 17.1ms image 62/200 E:\Code\ML\homiedar\dataset\images\val\val_0061.jpg: 320x416 1 not_homer, 10.9ms image 63/200 E:\Code\ML\homiedar\dataset\images\val\val_0062.jpg: 416x288 1 not_homer, 11.2ms image 64/200 E:\Code\ML\homiedar\dataset\images\val\val_0063.jpg: 416x288 1 homer_simpson, 13.2ms image 65/200 E:\Code\ML\homiedar\dataset\images\val\val_0064.jpg: 416x288 2 homer_simpsons, 10.3ms image 66/200 E:\Code\ML\homiedar\dataset\images\val\val_0065.jpg: 416x288 3 not_homers, 9.9ms image 67/200 E:\Code\ML\homiedar\dataset\images\val\val_0066.jpg: 320x416 1 not_homer, 12.0ms image 68/200 E:\Code\ML\homiedar\dataset\images\val\val_0067.jpg: 416x288 2 not_homers, 11.3ms image 69/200 E:\Code\ML\homiedar\dataset\images\val\val_0068.jpg: 416x288 2 homer_simpsons, 10.2ms image 70/200 E:\Code\ML\homiedar\dataset\images\val\val_0069.jpg: 416x288 2 homer_simpsons, 10.3ms image 71/200 E:\Code\ML\homiedar\dataset\images\val\val_0070.jpg: 416x288 1 not_homer, 10.1ms image 72/200 E:\Code\ML\homiedar\dataset\images\val\val_0071.jpg: 320x416 1 homer_simpson, 10.8ms image 73/200 E:\Code\ML\homiedar\dataset\images\val\val_0072.jpg: 416x288 2 homer_simpsons, 10.7ms image 74/200 E:\Code\ML\homiedar\dataset\images\val\val_0073.jpg: 416x288 1 homer_simpson, 9.9ms image 75/200 E:\Code\ML\homiedar\dataset\images\val\val_0074.jpg: 320x416 (no detections), 10.3ms image 76/200 E:\Code\ML\homiedar\dataset\images\val\val_0075.jpg: 416x288 1 not_homer, 10.6ms image 77/200 E:\Code\ML\homiedar\dataset\images\val\val_0076.jpg: 320x416 1 not_homer, 16.7ms image 78/200 E:\Code\ML\homiedar\dataset\images\val\val_0077.jpg: 416x288 2 not_homers, 10.8ms image 79/200 E:\Code\ML\homiedar\dataset\images\val\val_0078.jpg: 416x384 1 not_homer, 10.5ms image 80/200 E:\Code\ML\homiedar\dataset\images\val\val_0079.jpg: 416x288 2 not_homers, 17.7ms image 81/200 E:\Code\ML\homiedar\dataset\images\val\val_0080.jpg: 416x288 1 homer_simpson, 9.9ms image 82/200 E:\Code\ML\homiedar\dataset\images\val\val_0081.jpg: 416x288 2 not_homers, 10.1ms image 83/200 E:\Code\ML\homiedar\dataset\images\val\val_0082.jpg: 416x288 1 not_homer, 10.3ms image 84/200 E:\Code\ML\homiedar\dataset\images\val\val_0083.jpg: 320x416 1 not_homer, 10.6ms image 85/200 E:\Code\ML\homiedar\dataset\images\val\val_0084.jpg: 320x416 1 not_homer, 10.0ms image 86/200 E:\Code\ML\homiedar\dataset\images\val\val_0085.jpg: 320x416 1 homer_simpson, 13.4ms image 87/200 E:\Code\ML\homiedar\dataset\images\val\val_0086.jpg: 320x416 2 homer_simpsons, 10.2ms image 88/200 E:\Code\ML\homiedar\dataset\images\val\val_0087.jpg: 320x416 1 not_homer, 10.5ms image 89/200 E:\Code\ML\homiedar\dataset\images\val\val_0088.jpg: 320x416 1 homer_simpson, 10.4ms image 90/200 E:\Code\ML\homiedar\dataset\images\val\val_0089.jpg: 320x416 1 homer_simpson, 12.8ms image 91/200 E:\Code\ML\homiedar\dataset\images\val\val_0090.jpg: 416x384 1 not_homer, 11.2ms image 92/200 E:\Code\ML\homiedar\dataset\images\val\val_0091.jpg: 416x288 1 not_homer, 11.5ms image 93/200 E:\Code\ML\homiedar\dataset\images\val\val_0092.jpg: 416x288 1 not_homer, 11.2ms image 94/200 E:\Code\ML\homiedar\dataset\images\val\val_0093.jpg: 416x288 2 not_homers, 10.0ms image 95/200 E:\Code\ML\homiedar\dataset\images\val\val_0094.jpg: 416x288 2 homer_simpsons, 10.1ms image 96/200 E:\Code\ML\homiedar\dataset\images\val\val_0095.jpg: 416x288 1 not_homer, 10.4ms image 97/200 E:\Code\ML\homiedar\dataset\images\val\val_0096.jpg: 320x416 1 homer_simpson, 10.5ms image 98/200 E:\Code\ML\homiedar\dataset\images\val\val_0097.jpg: 256x416 1 homer_simpson, 10.7ms image 99/200 E:\Code\ML\homiedar\dataset\images\val\val_0098.jpg: 320x416 1 not_homer, 17.7ms image 100/200 E:\Code\ML\homiedar\dataset\images\val\val_0099.jpg: 320x416 2 not_homers, 10.1ms image 101/200 E:\Code\ML\homiedar\dataset\images\val\val_0100.jpg: 416x288 2 homer_simpsons, 11.4ms image 102/200 E:\Code\ML\homiedar\dataset\images\val\val_0101.jpg: 416x288 1 not_homer, 16.9ms image 103/200 E:\Code\ML\homiedar\dataset\images\val\val_0102.jpg: 416x320 1 homer_simpson, 18.1ms image 104/200 E:\Code\ML\homiedar\dataset\images\val\val_0103.jpg: 320x416 2 homer_simpsons, 10.8ms image 105/200 E:\Code\ML\homiedar\dataset\images\val\val_0104.jpg: 416x288 1 not_homer, 11.0ms image 106/200 E:\Code\ML\homiedar\dataset\images\val\val_0105.jpg: 416x288 1 homer_simpson, 1 not_homer, 9.8ms image 107/200 E:\Code\ML\homiedar\dataset\images\val\val_0106.jpg: 416x288 2 not_homers, 10.0ms image 108/200 E:\Code\ML\homiedar\dataset\images\val\val_0107.jpg: 320x416 1 homer_simpson, 11.0ms image 109/200 E:\Code\ML\homiedar\dataset\images\val\val_0108.jpg: 320x416 1 homer_simpson, 10.0ms image 110/200 E:\Code\ML\homiedar\dataset\images\val\val_0109.jpg: 416x288 2 not_homers, 11.9ms image 111/200 E:\Code\ML\homiedar\dataset\images\val\val_0110.jpg: 416x288 2 not_homers, 15.9ms image 112/200 E:\Code\ML\homiedar\dataset\images\val\val_0111.jpg: 320x416 2 not_homers, 11.6ms image 113/200 E:\Code\ML\homiedar\dataset\images\val\val_0112.jpg: 416x288 (no detections), 11.2ms image 114/200 E:\Code\ML\homiedar\dataset\images\val\val_0113.jpg: 320x416 (no detections), 12.0ms image 115/200 E:\Code\ML\homiedar\dataset\images\val\val_0114.jpg: 320x416 2 homer_simpsons, 14.5ms image 116/200 E:\Code\ML\homiedar\dataset\images\val\val_0115.jpg: 320x416 2 not_homers, 11.3ms image 117/200 E:\Code\ML\homiedar\dataset\images\val\val_0116.jpg: 256x416 1 homer_simpson, 11.1ms image 118/200 E:\Code\ML\homiedar\dataset\images\val\val_0117.jpg: 256x416 2 not_homers, 9.8ms image 119/200 E:\Code\ML\homiedar\dataset\images\val\val_0118.jpg: 416x288 1 not_homer, 13.2ms image 120/200 E:\Code\ML\homiedar\dataset\images\val\val_0119.jpg: 320x416 1 not_homer, 14.2ms image 121/200 E:\Code\ML\homiedar\dataset\images\val\val_0120.jpg: 416x384 1 homer_simpson, 11.1ms image 122/200 E:\Code\ML\homiedar\dataset\images\val\val_0121.jpg: 416x288 2 not_homers, 10.6ms image 123/200 E:\Code\ML\homiedar\dataset\images\val\val_0122.jpg: 416x288 1 homer_simpson, 9.7ms image 124/200 E:\Code\ML\homiedar\dataset\images\val\val_0123.jpg: 416x288 1 homer_simpson, 15.6ms image 125/200 E:\Code\ML\homiedar\dataset\images\val\val_0124.jpg: 320x416 1 homer_simpson, 10.3ms image 126/200 E:\Code\ML\homiedar\dataset\images\val\val_0125.jpg: 416x288 1 not_homer, 16.0ms image 127/200 E:\Code\ML\homiedar\dataset\images\val\val_0126.jpg: 416x288 1 homer_simpson, 10.0ms image 128/200 E:\Code\ML\homiedar\dataset\images\val\val_0127.jpg: 320x416 2 homer_simpsons, 10.5ms image 129/200 E:\Code\ML\homiedar\dataset\images\val\val_0128.jpg: 416x384 2 homer_simpsons, 10.4ms image 130/200 E:\Code\ML\homiedar\dataset\images\val\val_0129.jpg: 416x288 2 homer_simpsons, 10.8ms image 131/200 E:\Code\ML\homiedar\dataset\images\val\val_0130.jpg: 416x288 2 not_homers, 11.6ms image 132/200 E:\Code\ML\homiedar\dataset\images\val\val_0131.jpg: 320x416 1 homer_simpson, 11.0ms image 133/200 E:\Code\ML\homiedar\dataset\images\val\val_0132.jpg: 416x288 2 not_homers, 10.7ms image 134/200 E:\Code\ML\homiedar\dataset\images\val\val_0133.jpg: 416x288 2 homer_simpsons, 10.4ms image 135/200 E:\Code\ML\homiedar\dataset\images\val\val_0134.jpg: 320x416 1 not_homer, 11.6ms image 136/200 E:\Code\ML\homiedar\dataset\images\val\val_0135.jpg: 416x384 1 not_homer, 11.0ms image 137/200 E:\Code\ML\homiedar\dataset\images\val\val_0136.jpg: 416x384 2 not_homers, 10.0ms image 138/200 E:\Code\ML\homiedar\dataset\images\val\val_0137.jpg: 416x288 2 homer_simpsons, 10.5ms image 139/200 E:\Code\ML\homiedar\dataset\images\val\val_0138.jpg: 416x384 1 not_homer, 13.8ms image 140/200 E:\Code\ML\homiedar\dataset\images\val\val_0139.jpg: 416x288 1 homer_simpson, 10.7ms image 141/200 E:\Code\ML\homiedar\dataset\images\val\val_0140.jpg: 256x416 1 homer_simpson, 10.8ms image 142/200 E:\Code\ML\homiedar\dataset\images\val\val_0141.jpg: 320x416 1 homer_simpson, 11.3ms image 143/200 E:\Code\ML\homiedar\dataset\images\val\val_0142.jpg: 416x288 1 not_homer, 10.6ms image 144/200 E:\Code\ML\homiedar\dataset\images\val\val_0143.jpg: 320x416 1 homer_simpson, 10.7ms image 145/200 E:\Code\ML\homiedar\dataset\images\val\val_0144.jpg: 416x384 1 not_homer, 11.8ms image 146/200 E:\Code\ML\homiedar\dataset\images\val\val_0145.jpg: 320x416 2 not_homers, 10.8ms image 147/200 E:\Code\ML\homiedar\dataset\images\val\val_0146.jpg: 416x288 1 homer_simpson, 17.6ms image 148/200 E:\Code\ML\homiedar\dataset\images\val\val_0147.jpg: 320x416 2 homer_simpsons, 10.9ms image 149/200 E:\Code\ML\homiedar\dataset\images\val\val_0148.jpg: 320x416 1 not_homer, 12.5ms image 150/200 E:\Code\ML\homiedar\dataset\images\val\val_0149.jpg: 320x416 1 homer_simpson, 17.0ms image 151/200 E:\Code\ML\homiedar\dataset\images\val\val_0150.jpg: 320x416 3 not_homers, 14.4ms image 152/200 E:\Code\ML\homiedar\dataset\images\val\val_0151.jpg: 320x416 1 not_homer, 9.9ms image 153/200 E:\Code\ML\homiedar\dataset\images\val\val_0152.jpg: 320x416 2 homer_simpsons, 10.3ms image 154/200 E:\Code\ML\homiedar\dataset\images\val\val_0153.jpg: 320x416 1 not_homer, 10.4ms image 155/200 E:\Code\ML\homiedar\dataset\images\val\val_0154.jpg: 416x384 3 not_homers, 11.0ms image 156/200 E:\Code\ML\homiedar\dataset\images\val\val_0155.jpg: 320x416 2 homer_simpsons, 10.8ms image 157/200 E:\Code\ML\homiedar\dataset\images\val\val_0156.jpg: 416x288 2 not_homers, 10.9ms image 158/200 E:\Code\ML\homiedar\dataset\images\val\val_0157.jpg: 416x288 (no detections), 10.4ms image 159/200 E:\Code\ML\homiedar\dataset\images\val\val_0158.jpg: 320x416 1 homer_simpson, 10.8ms image 160/200 E:\Code\ML\homiedar\dataset\images\val\val_0159.jpg: 416x288 (no detections), 10.9ms image 161/200 E:\Code\ML\homiedar\dataset\images\val\val_0160.jpg: 320x416 3 homer_simpsons, 18.5ms image 162/200 E:\Code\ML\homiedar\dataset\images\val\val_0161.jpg: 416x288 1 homer_simpson, 10.7ms image 163/200 E:\Code\ML\homiedar\dataset\images\val\val_0162.jpg: 416x288 1 not_homer, 9.8ms image 164/200 E:\Code\ML\homiedar\dataset\images\val\val_0163.jpg: 320x416 2 not_homers, 10.8ms image 165/200 E:\Code\ML\homiedar\dataset\images\val\val_0164.jpg: 416x288 1 homer_simpson, 10.7ms image 166/200 E:\Code\ML\homiedar\dataset\images\val\val_0165.jpg: 416x288 1 homer_simpson, 10.1ms image 167/200 E:\Code\ML\homiedar\dataset\images\val\val_0166.jpg: 416x288 2 homer_simpsons, 10.8ms image 168/200 E:\Code\ML\homiedar\dataset\images\val\val_0167.jpg: 416x288 1 not_homer, 10.0ms image 169/200 E:\Code\ML\homiedar\dataset\images\val\val_0168.jpg: 320x416 1 homer_simpson, 13.8ms image 170/200 E:\Code\ML\homiedar\dataset\images\val\val_0169.jpg: 416x288 1 homer_simpson, 11.2ms image 171/200 E:\Code\ML\homiedar\dataset\images\val\val_0170.jpg: 320x416 1 not_homer, 10.6ms image 172/200 E:\Code\ML\homiedar\dataset\images\val\val_0171.jpg: 320x416 1 homer_simpson, 10.5ms image 173/200 E:\Code\ML\homiedar\dataset\images\val\val_0172.jpg: 416x288 1 homer_simpson, 10.7ms image 174/200 E:\Code\ML\homiedar\dataset\images\val\val_0173.jpg: 416x288 1 not_homer, 10.0ms image 175/200 E:\Code\ML\homiedar\dataset\images\val\val_0174.jpg: 416x288 1 homer_simpson, 15.6ms image 176/200 E:\Code\ML\homiedar\dataset\images\val\val_0175.jpg: 416x384 1 homer_simpson, 10.8ms image 177/200 E:\Code\ML\homiedar\dataset\images\val\val_0176.jpg: 416x288 1 homer_simpson, 14.9ms image 178/200 E:\Code\ML\homiedar\dataset\images\val\val_0177.jpg: 416x288 2 homer_simpsons, 10.7ms image 179/200 E:\Code\ML\homiedar\dataset\images\val\val_0178.jpg: 416x288 1 not_homer, 11.2ms image 180/200 E:\Code\ML\homiedar\dataset\images\val\val_0179.jpg: 256x416 1 homer_simpson, 11.5ms image 181/200 E:\Code\ML\homiedar\dataset\images\val\val_0180.jpg: 416x384 1 not_homer, 10.4ms image 182/200 E:\Code\ML\homiedar\dataset\images\val\val_0181.jpg: 320x416 2 not_homers, 10.7ms image 183/200 E:\Code\ML\homiedar\dataset\images\val\val_0182.jpg: 416x288 1 homer_simpson, 10.3ms image 184/200 E:\Code\ML\homiedar\dataset\images\val\val_0183.jpg: 416x288 1 not_homer, 10.1ms image 185/200 E:\Code\ML\homiedar\dataset\images\val\val_0184.jpg: 256x416 1 not_homer, 10.5ms image 186/200 E:\Code\ML\homiedar\dataset\images\val\val_0185.jpg: 320x416 1 not_homer, 10.7ms image 187/200 E:\Code\ML\homiedar\dataset\images\val\val_0186.jpg: 320x416 1 homer_simpson, 9.7ms image 188/200 E:\Code\ML\homiedar\dataset\images\val\val_0187.jpg: 416x288 1 not_homer, 10.5ms image 189/200 E:\Code\ML\homiedar\dataset\images\val\val_0188.jpg: 320x416 2 not_homers, 10.6ms image 190/200 E:\Code\ML\homiedar\dataset\images\val\val_0189.jpg: 416x288 1 not_homer, 10.5ms image 191/200 E:\Code\ML\homiedar\dataset\images\val\val_0190.jpg: 416x288 1 not_homer, 9.8ms image 192/200 E:\Code\ML\homiedar\dataset\images\val\val_0191.jpg: 320x416 (no detections), 10.7ms image 193/200 E:\Code\ML\homiedar\dataset\images\val\val_0192.jpg: 416x288 1 not_homer, 11.4ms image 194/200 E:\Code\ML\homiedar\dataset\images\val\val_0193.jpg: 416x288 2 homer_simpsons, 9.9ms image 195/200 E:\Code\ML\homiedar\dataset\images\val\val_0194.jpg: 416x288 1 homer_simpson, 10.0ms image 196/200 E:\Code\ML\homiedar\dataset\images\val\val_0195.jpg: 416x288 2 homer_simpsons, 9.7ms image 197/200 E:\Code\ML\homiedar\dataset\images\val\val_0196.jpg: 416x288 1 not_homer, 16.3ms image 198/200 E:\Code\ML\homiedar\dataset\images\val\val_0197.jpg: 320x416 2 homer_simpsons, 11.2ms image 199/200 E:\Code\ML\homiedar\dataset\images\val\val_0198.jpg: 320x416 1 homer_simpson, 16.4ms image 200/200 E:\Code\ML\homiedar\dataset\images\val\val_0199.jpg: 416x288 1 not_homer, 10.8ms Speed: 0.9ms preprocess, 13.3ms inference, 2.5ms postprocess per image at shape (1, 3, 416, 288)
%matplotlib inline
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
axes = axes.flatten()
random_results = random.sample(results, 4)
for ax, result in zip(axes, random_results):
img_with_boxes = result.plot() # BGR image
img_rgb = cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB)
ax.imshow(img_rgb)
ax.axis('off')
ax.set_title(result.path.split('/')[-1], fontsize=12)
plt.tight_layout()
plt.show()
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
print("Loading style transfer model...")
hub_model = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2')
style_image_path = "image2.jpg"
style_image = cv2.imread(style_image_path)
style_image = cv2.cvtColor(style_image, cv2.COLOR_BGR2RGB)
style_image = tf.image.resize(style_image, (256, 256))
style_image = style_image / 255.0
style_image = tf.expand_dims(style_image, 0)
def gan_style_transfer(crop_img):
# Convert BGR to RGB
crop_rgb = cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB)
# Prepare content image
content_img = tf.image.resize(crop_rgb, (256, 256))
content_img = content_img / 255.0
content_img = tf.expand_dims(content_img, 0)
# Apply style transfer
stylized = hub_model(content_img, style_image)[0]
# Resize back to original crop size
h, w = crop_img.shape[:2]
stylized = tf.image.resize(stylized[0], (h, w))
stylized = (stylized.numpy() * 255).astype(np.uint8)
# Convert back to BGR
stylized_bgr = cv2.cvtColor(stylized, cv2.COLOR_RGB2BGR)
return stylized_bgr
Loading style transfer model...
import os
import cv2
import matplotlib.pyplot as plt
test_images = [f for f in os.listdir("dataset/images/val") if f.endswith(('.jpg', '.png', '.jpeg'))][:10]
composite_count = 0
for fname in test_images:
input_path = os.path.join("dataset/images/val", fname)
orig_img = cv2.imread(input_path)
if orig_img is None:
continue
# YOLO inference
results = model.predict(input_path, imgsz=640, conf=0.25, verbose=False)
composite_img = orig_img.copy()
detected_homer = False
for result in results:
boxes = result.boxes
for box in boxes:
cls_id = int(box.cls[0])
if cls_id != 0:
continue
detected_homer = True
x1, y1, x2, y2 = map(int, box.xyxy[0])
# Crop detection
crop = orig_img[y1:y2, x1:x2].copy()
if crop.size == 0:
continue
# GAN-style transfer
stylized_crop = gan_style_transfer(crop)
# Paste stylized crop back
composite_img[y1:y2, x1:x2] = stylized_crop
# Draw bounding box on composite
cv2.rectangle(composite_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(composite_img, "Stylized Homer", (x1, y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
if detected_homer:
comparison = np.hstack([orig_img, composite_img])
# Display first 2 examples
comparison_rgb = cv2.cvtColor(comparison, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(15, 7))
plt.imshow(comparison_rgb)
plt.title(f"Original (Left) vs Stylized (Right) - {fname}")
plt.axis('off')
plt.tight_layout()
plt.show()